**********Generate consolidated variables -02***************
clear all


*************
*Stock market dataset
*************
use "kth_stock_202211.dta",replace
table marketname
*SSEFN is the small market named SSE First North
gen year = year(day)
table year
order peorgnr_LopnNr number_inst year
gsort peorgnr_LopnNr number_inst year
order peorgnr_LopnNr year marketname
drop number_inst share_type-currency askad-marketvaluecurrency

duplicates tag  peorgnr_LopnNr year, gen(id)

gen month = month(day)
gen date = day(day)
tostring month date,replace
gen len_m = length(month)
gen len_d = length(date)
replace month = "0" + month if len_m  ==1
replace date = "0" + date if len_d  ==1
gen date_id = month+date

*sse is the dummy variable indicates that if the stock was traded on SSE market. If at each bidnr and year level there are multiple stock market names, we choose the one at the earliest date (at the beginning of the year). If at the earliest date, the trading stock market is "sse", then this dummy var is 1.

gen sse_market = 1 if marketname == "SSE"
replace sse_market = 0 if sse_market ==.
destring date_id,replace
bysort peorgnr_LopnNr year: egen s_y = min(date_id)
bysort peorgnr_LopnNr year: gen sse_n = sse_market if date_id == s_y
bysort peorgnr_LopnNr year: egen sse = max(sse_n)

* o_m is the dummy variable indicates if the stock was traded on other small markets. If sse is 1, then o_m is 0.
gen o_m = 1 if sse == 0
replace o_m = 0 if o_m ==.
sort peorgnr_LopnNr year
drop id-sse_n
save "stockmarket_pri.dta",replace

*************
* Bankruptcy variable
*************
use "serrano_2020.dta" ,replace
keep Lopnr_PeOrgNr ser_year bol_konkurs
* bol_konkurs is the indicator about whether in this year the business went bankrupt, if yes, then it is 1
rename Lopnr_PeOrgNr peorgnr_LopnNr
rename ser_year year
* Merge with the stock market dataset
merge 1:m peorgnr_LopnNr year using"stockmarket_pri.dta"
drop _merge
gsort peorgnr_LopnNr year
codebook bol_konkurs

duplicates drop peorgnr_LopnNr year sse, force
drop marketname day
label variable sse "individual identifier - if the stock was traded on the SSE market"
label variable o_m "individual identifier - if the stock was traded on other small markets"
label variable bol_konkurs "whether in this year the business went bankrupt"
save "stockmarket_final.dta",replace


************
*The age of the company 
************
use "age_serrano.dta" ,replace
gen year = substr(ser_regdat,1,4)
bysort peorgnr_LopnNr: egen c = nvals(year)
codebook c
destring year, replace
* We took the earliest year as the founded year
bysort peorgnr_LopnNr: egen m_y = min(ser_year)
bysort peorgnr_LopnNr: egen e_y = min(year)
gen founded = year if c == 1 & e_y <= m_y
destring year founded, replace
bysort peorgnr_LopnNr: replace founded = e_y if founded ==.& e_y<= m_y
*After checking the dataset, sometimes the first observation in the serrano dataset appeared before the founded year (usually with one-year's difference), in this case we use the first year with observable data as firms' founded year
bysort peorgnr_LopnNr: replace founded = m_y if founded ==. & e_y > m_y
keep peorgnr_LopnNr ser_year founded 
rename ser_year year

bysort peorgnr_LopnNr: egen check = nvals(founded)
codebook check
drop check
merge 1:1 peorgnr_LopnNr year using "stockmarket_final.dta"
drop _merge 
save "stockmarket_age.dta" ,replace

use "uc_credit_score.dta" ,replace
rename peorgnr_LopnNr bidnr
duplicates drop bidnr period riskklass, force
drop year
gen year = substr(period,1,4)
destring year,replace
merge 1:1 bidnr year using  "emissions_req_KNC_final.dta"
drop if _merge == 1
drop _merge c_uniq 
order PeOrgNrK_LopNr year
gsort PeOrgNrK_LopNr year
save "knc_new",replace


*Merge with the emissions dataset
clear all
use "stockmarket_age.dta" ,replace
*Our emissions dataset has an observable period until 2016
drop if year > 2016
rename peorgnr_LopnNr bidnr
merge 1:1 bidnr year using "knc_new" 
bysort bidnr: egen founded_n = max(founded)
bysort PeOrgNrK_LopNr: egen con_founded_n = min(founded_n)
bysort PeOrgNrK_LopNr year: egen con_sse = max(sse)
bysort PeOrgNrK_LopNr year: egen con_o_m = max(o_m)
bysort PeOrgNrK_LopNr year: egen con_bankrup = max(bol_konkurs)
drop if _merge == 1
drop _merge 
/**We generate the group identifiers,
1. con_founded_n is the group-level age identifier, we took the earliest one among all the bidnrs within this group.

2. con_sse is the dummy variable indicates that if the groups' stocks were traded on SSE market. If a group(PeorgnrK_LopnNr) was traded on different markets, we choose the largest market. (If a group has one instrument with SSE, then sse_group is 1).

3. con_bankrup is the indicator about whether in this year the group went bankrupt, if yes, then it is 1.
*/
label variable con_founded_n "consolidated founded year"
label variable con_sse "group identifier - if the stock was traded on the SSE market"
label variable con_o_m "group identifier - if the stock was traded on other small markets"
label variable con_bankrup "consolidated bankruptcy variable"
label variable bol_konkurs "whether in this year the business went bankrupt, if it is 1, then yes"
order PeOrgNrK_LopNr bidnr year
sort PeOrgNrK_LopNr year
save "stockmarket_age_KNC.dta",replace

use "stockmarket_age_KNC.dta" ,replace
*we use the earliest year with observable data as firms' founded year if there is inconsistence between founded year and the earliest year with observable data
bysort bidnr: egen y_id = min(year)
bysort bidnr: gen y = 1 if founded_n <= y_id
bysort bidnr: replace founded_n = y_id if y != 1 & founded_n !=.
*con_founded_n is the group-level age identifier, we took the earliest one among all firms who belong to this group.
bysort PeOrgNrK_LopNr: egen con_founded_n1 = min(founded_n)
bysort PeOrgNrK_LopNr: replace con_founded_n = con_founded_n1 if con_founded_n > con_founded_n1
drop con_founded_n1
label variable con_founded_n "consolidated founded year"
drop y_id y
order PeOrgNrK_LopNr-year period-f_y1
save"stockmarket_age_KNC.dta" ,replace


*Merge it with the emissions' dataset 
use "stockmarket_age_KNC.dta" ,replace
*merge 1:1 bidnr year using "\\micro.intra\projekt\P0789$\P0789_Gem\Gustav\Material for inspection\magnitude_tax_20220522_emission_req.dta" 

merge 1:1 bidnr year using "master_file_with_fuels_emissions_w_inv.dta" 
keep if _merge == 3
gsort PeOrgNrK_LopNr year
replace con_founded_n =. if bidnr ==.
drop _merge founded
save "master_file_with_fuels_emissions_w_inv_v2_new.dta",replace


merge m:1 sni2007_4d using "PACE_realcapstock_2.dta"
drop if _merge==2
drop _merge

merge m:1 sni2007_4d using "PACE_sni2007_stat_2.dta"
drop if _merge==2
drop _merge
save "master_file_with_fuels_emissions_w_inv_v2_new.dta",replace


